
setwd("~/Desktop/STEPS National/all csv")

### ALGERIA 2016
data <- read.csv("dza2016.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "DZA_2016_STEPS_v01",  #replace with STEPS id
  country = "Algeria",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2017,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
   #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"




### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))
table(df$is_urban)

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Algeria_2017.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




setwd("~/Desktop/STEPS National/all csv")
### American samoa 2004
data <- read.csv("asm2004.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "ASM_2004_STEPS_v01",  #replace with STEPS id
  country = "American Samoa",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2004,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m8,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = NA,
  self_hyper = NA,
  drug_hyper = NA,
  glucose_measured = NA, 
  self_diabetes = NA, 
  drug_diabetes = NA
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"




### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Data/STEPS all")   #modify according to your PC
write.csv(df, "American_samoa_2004.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)





setwd("~/Desktop/STEPS National/all csv")

### BAHAMAS
data <- read.csv("bhs2011.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "BHS_2011_STEPS_v01",  #replace with STEPS id
  country = "Bahamas",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Americas",   #replace with full name of the WHO region (first letter capital)
  data_year = 2012,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = NA,
  
  survey_min_age = 24,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"




### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Bahamas_2012.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




setwd("~/Desktop/STEPS National/all csv")

### BAHAMAS
data <- read.csv("bhs2011.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "BHS_2011_STEPS_v01",  #replace with STEPS id
  country = "Bahamas",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Americas",   #replace with full name of the WHO region (first letter capital)
  data_year = 2012,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = NA,
  
  survey_min_age = 24,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"




### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Bahamas_2012.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)





setwd("~/Desktop/STEPS National/all csv")

### BARBADOS
data <- read.csv("brb2007 (1).csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "BRB_2007_STEPS_v01",  #replace with STEPS id
  country = "Barbados",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Americas",   #replace with full name of the WHO region (first letter capital)
  data_year = 2007,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = NA,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1a,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6a, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"




### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
table(df$sex)
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Barbados_2007.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




setwd("~/Desktop/STEPS National/all csv")

### BENIN
data <- read.csv("ben2015.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "BEN_2015_STEPS_v01",  #replace with STEPS id
  country = "Benin",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2015,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))
table(df$is_urban)

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Benin_2015.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




### BOTSWANA

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("bwa2014.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "BWA_2014_STEPS_v01",  #replace with STEPS id
  country = "Botswana",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2014,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))
table(df$is_urban)

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Botswana_2014.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




### BRITISH VIRGIN ISLANDS

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("BVI2009.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "BVI_2009_STEPS_v01",  #replace with STEPS id
  country = "British Virgin Islands",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Americas",   #replace with full name of the WHO region (first letter capital)
  data_year = 2009,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = NA,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "British_virgin_islands_2009.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




### Cabo Verde

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("cpv2007.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "CPV_2007_STEPS_v01",  #replace with STEPS id
  country = "Cabo Verde",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2007,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Cabo_verde_2007.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)





### Cambodia

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("khm2010.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "KHM_2010_STEPS_v01",  #replace with STEPS id
  country = "Cambodia",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2010,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))
table(df$is_urban)


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Cambodia_2010.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)







### Cayman Islands

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("CYM2012.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "CYM_2012_STEPS_v01",  #replace with STEPS id
  country = "Cayman Islands",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Americas",   #replace with full name of the WHO region (first letter capital)
  data_year = 2012,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = NA,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
#df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))
#table(df$is_urban)


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Cayman_islands_2012.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


### Comoros

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("com2011.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "COM_2011_STEPS_v01",  #replace with STEPS id
  country = "Comoros",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2011,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = urb_rur,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == "Urbain", 1, 0))
table(df$is_urban)


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Comoros_2011.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


### Cook Islands

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("cok2013.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "COK_2013_STEPS_v01",  #replace with STEPS id
  country = "Cook Islands",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2015,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary




### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Cook_islands_2015.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




### Ecuador

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("ecu2018.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "ECU_2018_STEPS_v01",  #replace with STEPS id
  country = "Ecuador",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Americas",   #replace with full name of the WHO region (first letter capital)
  data_year = 2018,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary




### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Ecuador_2018.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Eritrea

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("eri2010.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "ERI_2010_STEPS_v01",  #replace with STEPS id
  country = "Eritrea",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2010,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 74,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary




### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Eritrea_2010.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Eswatini

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("swz2014.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "SWZ_2014_STEPS_v01",  #replace with STEPS id
  country = "Eswatini",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2014,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 15,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary




### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Eswatini_2014.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Ethiopia

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("eth2015.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "ETH_2015_STEPS_v01",  #replace with STEPS id
  country = "Ethiopia",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2015,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 15,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = c1,   #replace with sex variable 
  age            = c3,   #replace with age variable (years)
  is_urban       = urbanrural,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
#df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))
table(df$is_urban)



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Ethiopia_2015.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




### Fiji

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("FJI2011.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "FJI_2011_STEPS_v01",  #replace with STEPS id
  country = "Fiji",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2011,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary




### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Fiji_2011.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




### French Polynesia

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("PYF2010.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "PYF_2010_STEPS_v01",  #replace with STEPS id
  country = "French Polynesia",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2010,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = NA,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = NA,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary




### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "French_polynesia_2010.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Gambia

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("GMB2010.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "GMB_2010_STEPS_v01",  #replace with STEPS id
  country = "Gambia",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2010,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = NA,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
table(df$sex)
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary




### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Gambia_2010.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Grenada

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("GRD2010.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "GRD_2010_STEPS_v01",  #replace with STEPS id
  country = "Grenada",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Americas",   #replace with full name of the WHO region (first letter capital)
  data_year = 2011,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = NA,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary




### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Grenada_2011.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Guyana

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("guy2016.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "GUY_2016_STEPS_v01",  #replace with STEPS id
  country = "Guyana",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Americas",   #replace with full name of the WHO region (first letter capital)
  data_year = 2016,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m13,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary




### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Guyana_2016.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




### Iraq

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("irq2015.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "IRQ_2015_STEPS_v01",  #replace with STEPS id
  country = "Iraq",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Eastern Mediterranean",   #replace with full name of the WHO region (first letter capital)
  data_year = 2015,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = NA,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary




### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Iraq_2015.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




###Kenya

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("ken2015.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "KEN_2015_STEPS_v01",  #replace with STEPS id
  country = "Kenya",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2015,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = urbrur,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Kenya_2015.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Kiribati

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("kir2015.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "KIR_2015_STEPS_v01",  #replace with STEPS id
  country = "Kiribati",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2016,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Kiribati_2016.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


### Kuwait

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("kwt2014.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "KWT_2014_STEPS_v01",  #replace with STEPS id
  country = "Kuwait",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Eastern Mediterranean",   #replace with full name of the WHO region (first letter capital)
  data_year = 2014,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Kuwait_2014.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Kyrgyzstan

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("kgz2013.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "KGZ_2013_STEPS_v01",  #replace with STEPS id
  country = "Kyrgyzstan",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Eastern Mediterranean",   #replace with full name of the WHO region (first letter capital)
  data_year = 2013,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Kyrgyzstan_2013.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Lao People's Democratic Republic

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("lao2013.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "LAO_2013_STEPS_v01",  #replace with STEPS id
  country = "Lao People's Democratic Republic",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2013,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = PSU,
  stratum = Stratum,
  wstep1  = Wstep1,
  wstep2  = Wstep2,
  wstep3  = Wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = Stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Lao_People's_Democratic_Republic_2013.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Lesotho

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("lso2012.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "LSO_2012_STEPS_v01",  #replace with STEPS id
  country = "Lesotho",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2012,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Lesotho_2012.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Liberia

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("lbr2011.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "LBR_2011_STEPS_v01",  #replace with STEPS id
  country = "Liberia",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2011,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Liberia_2011.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"



### Libya

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("lby2009.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "LBY_2009_STEPS_v01",  #replace with STEPS id
  country = "Libya",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Eastern Mediterranean",   #replace with full name of the WHO region (first letter capital)
  data_year = 2009,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Libya_2009.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


### Marshall Islands

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("mhl2017.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "MHL_2017_STEPS_v01",  #replace with STEPS id
  country = "Marshall Islands",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2018,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = NA,
  stratum = NA,
  wstep1  = NA,
  wstep2  = NA,
  wstep3  = NA,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = NA,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = c1,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = xm8preg,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = NA,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1 | is_pregnant  == 3, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, ifelse(bp_measured  == 99, NA,0)))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, ifelse(glucose_measured  == 99, NA,0)))


df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Data/STEPS all")   #modify according to your PC
write.csv(df, "Marshall_islands_2018.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


### Mozambique

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("moz2005.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "MOZ_2005_STEPS_v01",  #replace with STEPS id
  country = "Mozambique",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2005,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = NA,
  stratum = NA,
  wstep1  = analysisweight,
  wstep2  = analysisweight,
  wstep3  = analysisweight,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))
#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1 | bp_measured == 2 | bp_measured == 3, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))


df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Data/STEPS all")   #modify according to your PC
write.csv(df, "Mozambique_2005.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


### Myanmar

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("MMR2014.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "MMR_2014_STEPS_v01",  #replace with STEPS id
  country = "Myanmar",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Southeast Asia",   #replace with full name of the WHO region (first letter capital)
  data_year = 2014,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Myanmar_2014.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


### Namibia

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("nam2005.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "NAM_2005_STEPS_v01",  #replace with STEPS id
  country = "Namibia",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2005,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = NA,
  wstep2  = wstep2,
  wstep3  = NA,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = NA,
  self_hyper = NA,
  drug_hyper = m14,
  glucose_measured = NA, 
  self_diabetes = NA, 
  drug_diabetes = NA
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))
#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))

### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Data/STEPS all")   #modify according to your PC
write.csv(df, "Namibia_2005.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)

### Nauru

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("NRU2015.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "NRU_2015_STEPS_v01",  #replace with STEPS id
  country = "Nauru",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2016,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Nauru_2016.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)

###Niger

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("ner2007.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "NER_2007_STEPS_v01",  #replace with STEPS id
  country = "Niger",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2007,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = NA,
  stratum = NA,
  wstep1  = NA,
  wstep2  = NA,
  wstep3  = NA,
  
  survey_min_age = 15,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))
#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1|bp_measured  == 2| bp_measured  == 3 , 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))


df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Data/STEPS all")   #modify according to your PC
write.csv(df, "Niger_2007.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


### Niue

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("niu2011.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "NIU_2011_STEPS_v01",  #replace with STEPS id
  country = "Niue",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2012,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 15,   #replace with the youngest age target
  survey_max_age = NA,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Niue_2012.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



###Palau

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("plw2011.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "PLW_2011_STEPS_v01",  #replace with STEPS id
  country = "Palau",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2013,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Palau_2013.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



###Qatar

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("Qatar2012.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "QAT_2012_STEPS_v01",  #replace with STEPS id
  country = "Qatar",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Eastern Mediterranean",   #replace with full name of the WHO region (first letter capital)
  data_year = 2012,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary



### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Qatar_2012.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




### Republic of Moldova

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("mda2013.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "MDA_2013_STEPS_v01",  #replace with STEPS id
  country = "Republic of Moldova",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Europe",   #replace with full name of the WHO region (first letter capital)
  data_year = 2013,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = c1,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = urbanrural,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
table(df$sex, useNA = c("always"))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban == "U", 1, 0))


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Republic_Moldova_2013.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




### Rwanda

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("rwanda2012.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "RWA_2012_STEPS_v01",  #replace with STEPS id
  country = "Rwanda",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2013,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Rwamda_2013.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Samoa

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("samoa2013.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "WSM_2013_STEPS_v01",  #replace with STEPS id
  country = "Samoa",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2013,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Samoa_2013.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




### Sao Tome and Principe

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("stp2008.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "STP_2009_STEPS_v01",  #replace with STEPS id
  country = "Sao Tome and Principe",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2008,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Sao_Tome_and_Principe_2008.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Seychelles

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("syc2004.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "SYC_2004_STEPS_v01",  #replace with STEPS id
  country = "Seychelles",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2004,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = NA,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = NA,
  self_hyper = NA,
  drug_hyper = NA,
  glucose_measured = NA, 
  self_diabetes = NA, 
  drug_diabetes = NA
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Seychelles_2004.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




### Sierra Leone

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("sle2009.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "SLE_2009_STEPS_v01",  #replace with STEPS id
  country = "Sierra Leone",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2009,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = NA,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Sierra_Leone_2009.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Sri Lanka

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("LKA2014.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "LKA_2014_STEPS_v01",  #replace with STEPS id
  country = "Sri Lanka",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Southeast Asia",   #replace with full name of the WHO region (first letter capital)
  data_year = 2015,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Sri_Lanka_2015.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)





### Tajikistan

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("tjk2016.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "TJK_2016_STEPS_v01",  #replace with STEPS id
  country = "Tajikistan",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Europe",   #replace with full name of the WHO region (first letter capital)
  data_year = 2017,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Tajikistan_2017.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Timor-Leste

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("tls2014.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "TLS_2014_STEPS_v01",  #replace with STEPS id
  country = "Timor-Leste",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Southeast Asia",   #replace with full name of the WHO region (first letter capital)
  data_year = 2014,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Timor-Leste_2014.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


### Togo

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("tgo2010.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "TGO_2010_STEPS_v01",  #replace with STEPS id
  country = "Togo",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2011,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Togo_2011.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Tuvalu

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("TUV2015.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "TUV_2015_STEPS_v01",  #replace with STEPS id
  country = "Tuvalu",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2015,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Tuvalu_2015.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




### Uganda

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("uga2014.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "UGA_2014_STEPS_v01",  #replace with STEPS id
  country = "Uganda",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2014,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban == 1, 1, 0))

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Uganda_2014.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### United Republic of Tanzania

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("tza2012.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "TZA_2012_STEPS_v01",  #replace with STEPS id
  country = "United Republic of Tanzania",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2012,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban == 1, 1, 0))

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "United_Republic_of_Tanzania_2012.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


###Uruguay

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("ury2013.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "URY_2013_STEPS_v01",  #replace with STEPS id
  country = "Uruguay",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Americas",   #replace with full name of the WHO region (first letter capital)
  data_year = 2014,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = NA,
  stratum = NA,
  wstep1  = weightsdi,
  wstep2  = weightsdi,
  wstep3  = weightsdi,
  
  survey_min_age = 15,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = 1:nrow(data),   #if not available use 1:nrow(data)
  sex            = c1,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))


df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Data/STEPS all")   #modify according to your PC
write.csv(df, "Uruguay_2014.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)

### Vanuatu

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("vut2011.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "VUT_2011_STEPS_v01",  #replace with STEPS id
  country = "Vanuatu",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2011,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 25,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3a,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8b
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban == 1, 1, 0))

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Vanuatu_2011.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



### Vietnam

setwd("~/Desktop/STEPS National/all csv")
data <- read.csv("vnm2015.csv") #modify according to the STEPS file name

### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "VNM_2015_STEPS_v01",  #replace with STEPS id
  country = "Vietnam",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2015,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = urban,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8
  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"


### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban == 1, 1, 0))

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 22 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/STEPS all")   #modify according to your PC
write.csv(df, "Vietnam_2015.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)

######################################################################
######################################################################


###POOL NEW DATASET
setwd("~/Desktop/Artículos/STEPS/Data/STEPS all")

datasets = list.files(pattern = "*.csv")
for(i in 1:length(datasets)) assign(datasets[i], read.csv(datasets[i]))
rm(datasets,i)
length(unique(ls()))

datalist <- lapply(ls(), function(x) if (class(get(x)) == "data.frame") get(x))



### APPEND ALL DATASETS BROUGHT FROM THE FOLDER (54 DATASETS)
pooleddata <- plyr::rbind.fill(datalist)
rm(list=setdiff(ls(), c("pooleddata")))





### --- NEW VARIABLES / RECODING 
pooleddata$sbp <- (pooleddata$sbp2 + pooleddata$sbp3)/2
pooleddata$dbp <- (pooleddata$dbp2 + pooleddata$dbp3)/2
pooleddata <- subset(pooleddata, select = -c(sbp1, sbp2, sbp3, dbp1, dbp2, dbp3))
pooleddata$bmi  <- (pooleddata$weight/pooleddata$height^2)




### --- EXCLUSION CRITERIA 
# 1. PEOPLE AGED 15-69
pooleddata <- pooleddata[which(pooleddata$age >= 15),]
pooleddata <- pooleddata[which(pooleddata$age <= 69),]

length(table(pooleddata$study_id))

# 2. MISSING WEIGHT OR HEIGHT OR SBP/DBP OR LABS OR PSU/STRATUM/WSTEP2
pooleddata <- pooleddata[which(!is.na(pooleddata$weight)),]
pooleddata <- pooleddata[which(!is.na(pooleddata$height)),]
pooleddata <- pooleddata[which(!is.na(pooleddata$sbp)),]
pooleddata <- pooleddata[which(!is.na(pooleddata$dbp)),]
        ##Removing observations without wstep2 except for: Niger and Marshall Islands (they don't come with wstep2)
pooleddata <- pooleddata[which(!is.na(pooleddata$wstep2) | pooleddata$study_id == "NER_2007_STEPS_v01" | 
                                 pooleddata$study_id == "MHL_2017_STEPS_v01"),]
        ##Removing observations without psu except for: Niger, Marshall Islands, Mozambique and Uruguay (they don't come with psu)
pooleddata <- pooleddata[which(!is.na(pooleddata$psu)| pooleddata$study_id == "NER_2007_STEPS_v01" | 
                                 pooleddata$study_id == "MHL_2017_STEPS_v01"|
                                 pooleddata$study_id == "MOZ_2005_STEPS_v01"|
                                 pooleddata$study_id == "URY_2013_STEPS_v01"),]
        ##Removing observations without stratum except for: Niger, Marshall Islands, Mozambique and Uruguay (they don't come with stratum)
pooleddata <- pooleddata[which(!is.na(pooleddata$stratum)| pooleddata$study_id == "NER_2007_STEPS_v01" | 
                                 pooleddata$study_id == "MHL_2017_STEPS_v01"|
                                 pooleddata$study_id == "MOZ_2005_STEPS_v01"|
                                 pooleddata$study_id == "URY_2013_STEPS_v01"),]
#
# 3. IMPLAUSIBLE RANGES WEIGHT, HEIGHT, BMI AND SBP/DBP
pooleddata <- pooleddata[which(pooleddata$weight >= 12),]
pooleddata <- pooleddata[which(pooleddata$weight <= 300),]

pooleddata <- pooleddata[which(pooleddata$height >= 1.00),]
pooleddata <- pooleddata[which(pooleddata$height <= 2.50),]

pooleddata <- pooleddata[which(pooleddata$sbp >= 70),]
pooleddata <- pooleddata[which(pooleddata$sbp <= 270),]

pooleddata <- pooleddata[which(pooleddata$dbp >= 30),]
pooleddata <- pooleddata[which(pooleddata$dbp <= 150),]

pooleddata <- pooleddata[which(pooleddata$bmi >= 10),]
pooleddata <- pooleddata[which(pooleddata$bmi <= 80),]



# 4. PREGNANT 
table(pooleddata$is_pregnant, pooleddata$study_id)
pooleddata <- pooleddata[which(pooleddata$is_pregnant == 0 | is.na(pooleddata$is_pregnant)),]


### --- EDA: EXPLORATORY DATA ANALYSIS
summary(pooleddata$age)
summary(pooleddata$weight)
summary(pooleddata$height)



table(pooleddata$sex, useNA = c("always"))
table(pooleddata$sex, pooleddata$study_id, useNA = c("always"))

tapply(pooleddata$weight, pooleddata$study_id, summary)
tapply(pooleddata$height, pooleddata$study_id, summary)
tapply(pooleddata$sbp, pooleddata$study_id, summary)

summary(pooleddata)
###############################################################################################
length(table(pooleddata$study_id))

### - SAVE ONE COMPLETE DATASET
names(pooleddata)
write.csv(pooleddata[,
                     c(1:4, 6:7, 9 ,13:15,18:19, 27:29)],
          paste0("~/Desktop/Artículos/STEPS/Data/Extraction_model_application_pooleddata_", Sys.Date(), ".csv"),
          row.names = F, 
          fileEncoding = 'UTF-8')






